#define TIMER_FREQ 1193182
#endif
u_int timer_freq = TIMER_FREQ;
+struct mtx clock_lock;
+
static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
#include <machine/ctrl_if.h>
#include <machine/evtchn.h>
+/*
+ * Extra ring macros to sync a consumer index up to the public producer index.
+ * Generally UNSAFE, but we use it for recovery and shutdown in some cases.
+ */
+#define RING_DROP_PENDING_REQUESTS(_r) \
+ do { \
+ (_r)->req_cons = (_r)->sring->req_prod; \
+ } while (0)
+#define RING_DROP_PENDING_RESPONSES(_r) \
+ do { \
+ (_r)->rsp_cons = (_r)->sring->rsp_prod; \
+ } while (0)
/*
* Only used by initial domain which must create its own control-interface
* event channel. This value is picked up by the user-space domain controller
static int * ctrl_if_wchan = &ctrl_if_evtchn;
-static CONTROL_RING_IDX ctrl_if_tx_resp_cons;
-static CONTROL_RING_IDX ctrl_if_rx_req_cons;
+static ctrl_front_ring_t ctrl_if_tx_ring;
+static ctrl_back_ring_t ctrl_if_rx_ring;
/* Incoming message requests. */
/* Primary message type -> message handler. */
TASKQUEUE_DEFINE(ctrl_if_txB, NULL, NULL, {});
struct taskqueue **taskqueue_ctrl_if_tx[2] = { &taskqueue_ctrl_if_txA,
&taskqueue_ctrl_if_txB };
-int ctrl_if_idx;
+static int ctrl_if_idx = 0;
static struct task ctrl_if_rx_tasklet;
static struct task ctrl_if_tx_tasklet;
#define get_ctrl_if() ((control_if_t *)((char *)HYPERVISOR_shared_info + 2048))
-#define TX_FULL(_c) \
- (((_c)->tx_req_prod - ctrl_if_tx_resp_cons) == CONTROL_RING_SIZE)
static void
ctrl_if_notify_controller(void)
static void
__ctrl_if_tx_tasklet(void *context __unused, int pending __unused)
{
- control_if_t *ctrl_if = get_ctrl_if();
ctrl_msg_t *msg;
- int was_full = TX_FULL(ctrl_if);
+ int was_full = RING_FULL(&ctrl_if_tx_ring);
+ RING_IDX i, rp;
+
+ i = ctrl_if_tx_ring.rsp_cons;
+ rp = ctrl_if_tx_ring.sring->rsp_prod;
+ rmb(); /* Ensure we see all requests up to 'rp'. */
- while ( ctrl_if_tx_resp_cons != ctrl_if->tx_resp_prod )
+ for ( ; i != rp; i++ )
{
- msg = &ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if_tx_resp_cons)];
+ msg = RING_GET_RESPONSE(&ctrl_if_tx_ring, i);
/* Execute the callback handler, if one was specified. */
if ( msg->id != 0xFF )
ctrl_if_txmsg_id_mapping[msg->id].fn = NULL;
}
- /*
- * Step over the message in the ring /after/ finishing reading it. As
- * soon as the index is updated then the message may get blown away.
- */
- smp_mb();
- ctrl_if_tx_resp_cons++;
}
- if ( was_full && !TX_FULL(ctrl_if) )
+ /*
+ * Step over the message in the ring /after/ finishing reading it. As
+ * soon as the index is updated then the message may get blown away.
+ */
+ smp_mb();
+ ctrl_if_tx_ring.rsp_cons = i;
+
+ if ( was_full && !RING_FULL(&ctrl_if_tx_ring) )
{
wakeup(ctrl_if_wchan);
/* bump idx so future enqueues will occur on the next taskq
* process any currently pending tasks
*/
- ctrl_if_idx++;
+ ctrl_if_idx++;
taskqueue_run(*taskqueue_ctrl_if_tx[(ctrl_if_idx-1) & 1]);
}
+
}
static void
__ctrl_if_rxmsg_deferred_task(void *context __unused, int pending __unused)
{
ctrl_msg_t *msg;
+ CONTROL_RING_IDX dp;
- while ( ctrl_if_rxmsg_deferred_cons != ctrl_if_rxmsg_deferred_prod )
+ dp = ctrl_if_rxmsg_deferred_prod;
+ rmb(); /* Ensure we see all deferred requests up to 'dp'. */
+
+ while ( ctrl_if_rxmsg_deferred_cons != dp )
{
msg = &ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(
ctrl_if_rxmsg_deferred_cons++)];
(*ctrl_if_rxmsg_handler[msg->type])(msg, 0);
}
+
}
static void
__ctrl_if_rx_tasklet(void *context __unused, int pending __unused)
{
- control_if_t *ctrl_if = get_ctrl_if();
ctrl_msg_t msg, *pmsg;
+ CONTROL_RING_IDX dp;
+ RING_IDX rp, i;
+
+ i = ctrl_if_rx_ring.req_cons;
+ rp = ctrl_if_rx_ring.sring->req_prod;
+ dp = ctrl_if_rxmsg_deferred_prod;
- while ( ctrl_if_rx_req_cons != ctrl_if->rx_req_prod )
+ rmb(); /* Ensure we see all requests up to 'rp'. */
+
+ for ( ; i != rp; i++)
{
- pmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if_rx_req_cons++)];
+ pmsg = RING_GET_REQUEST(&ctrl_if_rx_ring, i);
memcpy(&msg, pmsg, offsetof(ctrl_msg_t, msg));
+
+ if ( msg.length > sizeof(msg.msg))
+ msg.length = sizeof(msg.msg);
if ( msg.length != 0 )
memcpy(msg.msg, pmsg->msg, msg.length);
if ( test_bit(msg.type, &ctrl_if_rxmsg_blocking_context) )
{
- pmsg = &ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(
- ctrl_if_rxmsg_deferred_prod++)];
- memcpy(pmsg, &msg, offsetof(ctrl_msg_t, msg) + msg.length);
- taskqueue_enqueue(taskqueue_thread, &ctrl_if_rxmsg_deferred_task);
+ memcpy(&ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(dp++)],
+ &msg, offsetof(ctrl_msg_t, msg) + msg.length);
}
else
{
(*ctrl_if_rxmsg_handler[msg.type])(&msg, 0);
}
}
+ ctrl_if_rx_ring.req_cons = i;
+
+ if ( dp != ctrl_if_rxmsg_deferred_prod )
+ {
+ wmb();
+ ctrl_if_rxmsg_deferred_prod = dp;
+ taskqueue_enqueue(taskqueue_thread, &ctrl_if_rxmsg_deferred_task);
+ }
+
}
static void
ctrl_if_interrupt(void *ctrl_sc)
/* (int irq, void *dev_id, struct pt_regs *regs) */
{
- control_if_t *ctrl_if = get_ctrl_if();
- if ( ctrl_if_tx_resp_cons != ctrl_if->tx_resp_prod )
+
+ if ( RING_HAS_UNCONSUMED_RESPONSES(&ctrl_if_tx_ring) )
taskqueue_enqueue(taskqueue_swi, &ctrl_if_tx_tasklet);
- if ( ctrl_if_rx_req_cons != ctrl_if->rx_req_prod )
+ if ( RING_HAS_UNCONSUMED_REQUESTS(&ctrl_if_rx_ring) )
taskqueue_enqueue(taskqueue_swi, &ctrl_if_rx_tasklet);
+
}
int
ctrl_msg_handler_t hnd,
unsigned long id)
{
- control_if_t *ctrl_if = get_ctrl_if();
unsigned long flags;
+ ctrl_msg_t *dmsg;
int i;
mtx_lock_irqsave(&ctrl_if_lock, flags);
- if ( TX_FULL(ctrl_if) )
+ if ( RING_FULL(&ctrl_if_tx_ring) )
{
mtx_unlock_irqrestore(&ctrl_if_lock, flags);
return EAGAIN;
msg->id = i;
}
- memcpy(&ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if->tx_req_prod)],
- msg, sizeof(*msg));
- wmb(); /* Write the message before letting the controller peek at it. */
- ctrl_if->tx_req_prod++;
+ dmsg = RING_GET_REQUEST(&ctrl_if_tx_ring,
+ ctrl_if_tx_ring.req_prod_pvt);
+ memcpy(dmsg, msg, sizeof(*msg));
+ ctrl_if_tx_ring.req_prod_pvt++;
+ RING_PUSH_REQUESTS(&ctrl_if_tx_ring);
mtx_unlock_irqrestore(&ctrl_if_lock, flags);
long wait_state)
{
int rc, sst = 0;
-
+
/* Fast path. */
- if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != EAGAIN )
- return rc;
-
-
+ if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != EAGAIN )
+ goto done;
+
for ( ; ; )
{
if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != EAGAIN )
break;
- if ( sst != 0)
- return EINTR;
+ if ( sst != 0) {
+ rc = EINTR;
+ goto done;
+ }
sst = tsleep(ctrl_if_wchan, PWAIT|PCATCH, "ctlrwt", 10);
}
-
+ done:
+
return rc;
}
int
ctrl_if_enqueue_space_callback(struct task *task)
{
- control_if_t *ctrl_if = get_ctrl_if();
/* Fast path. */
- if ( !TX_FULL(ctrl_if) )
+ if ( !RING_FULL(&ctrl_if_tx_ring) )
return 0;
(void)taskqueue_enqueue(*taskqueue_ctrl_if_tx[(ctrl_if_idx & 1)], task);
* certainly return 'not full'.
*/
smp_mb();
- return TX_FULL(ctrl_if);
+ return RING_FULL(&ctrl_if_tx_ring);
}
void
ctrl_if_send_response(ctrl_msg_t *msg)
{
- control_if_t *ctrl_if = get_ctrl_if();
unsigned long flags;
ctrl_msg_t *dmsg;
* In this situation we may have src==dst, so no copying is required.
*/
mtx_lock_irqsave(&ctrl_if_lock, flags);
- dmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if->rx_resp_prod)];
+ dmsg = RING_GET_RESPONSE(&ctrl_if_rx_ring,
+ ctrl_if_rx_ring.rsp_prod_pvt);
if ( dmsg != msg )
memcpy(dmsg, msg, sizeof(*msg));
- wmb(); /* Write the message before letting the controller peek at it. */
- ctrl_if->rx_resp_prod++;
+
+ ctrl_if_rx_ring.rsp_prod_pvt++;
+ RING_PUSH_RESPONSES(&ctrl_if_rx_ring);
+
mtx_unlock_irqrestore(&ctrl_if_lock, flags);
ctrl_if_notify_controller();
{
unsigned long _flags;
int inuse;
-
+
mtx_lock_irqsave(&ctrl_if_lock, _flags);
inuse = (ctrl_if_rxmsg_handler[type] != ctrl_if_rxmsg_default_handler);
}
mtx_unlock_irqrestore(&ctrl_if_lock, _flags);
-
+
return !inuse;
}
unbind_evtchn_from_irq(ctrl_if_evtchn);
}
+#if 0
/** Reset the control interface progress pointers.
* Marks the queues empty if 'clear' non-zero.
*/
ctrl_if_rx_req_cons = ctrl_if->rx_resp_prod;
}
-
+#endif
void
ctrl_if_resume(void)
{
+ control_if_t *ctrl_if = get_ctrl_if();
+
+ TRACE_ENTER;
if ( xen_start_info->flags & SIF_INITDOMAIN )
{
/*
initdom_ctrlif_domcontroller_port = op.u.bind_interdomain.port2;
}
- ctrl_if_reset(0);
+
+ /* Sync up with shared indexes. */
+ FRONT_RING_ATTACH(&ctrl_if_tx_ring, &ctrl_if->tx_ring);
+ BACK_RING_ATTACH(&ctrl_if_rx_ring, &ctrl_if->rx_ring);
ctrl_if_evtchn = xen_start_info->domain_controller_evtchn;
ctrl_if_irq = bind_evtchn_to_irq(ctrl_if_evtchn);
*/
intr_add_handler("ctrl-if", ctrl_if_irq, (driver_intr_t*)ctrl_if_interrupt,
- NULL, INTR_TYPE_NET | INTR_MPSAFE, NULL);
+ NULL, INTR_TYPE_NET, NULL);
+ TRACE_EXIT;
+ /* XXX currently assuming not MPSAFE */
}
static void
ctrl_if_init(void *dummy __unused)
{
+ control_if_t *ctrl_if = get_ctrl_if();
+
int i;
for ( i = 0; i < 256; i++ )
ctrl_if_rxmsg_handler[i] = ctrl_if_rxmsg_default_handler;
+ FRONT_RING_ATTACH(&ctrl_if_tx_ring, &ctrl_if->tx_ring);
+ BACK_RING_ATTACH(&ctrl_if_rx_ring, &ctrl_if->rx_ring);
+
mtx_init(&ctrl_if_lock, "ctrlif", NULL, MTX_SPIN | MTX_NOWITNESS);
TASK_INIT(&ctrl_if_tx_tasklet, 0, __ctrl_if_tx_tasklet, NULL);
TASK_INIT(&ctrl_if_rxmsg_deferred_task, 0, __ctrl_if_rxmsg_deferred_task, NULL);
- ctrl_if_reset(1);
+
ctrl_if_resume();
}
int
ctrl_if_transmitter_empty(void)
{
- return (get_ctrl_if()->tx_req_prod == ctrl_if_tx_resp_cons);
+ return (ctrl_if_tx_ring.sring->req_prod == ctrl_if_tx_ring.rsp_cons);
}
void
ctrl_if_discard_responses(void)
{
- ctrl_if_tx_resp_cons = get_ctrl_if()->tx_resp_prod;
+ RING_DROP_PENDING_RESPONSES(&ctrl_if_tx_ring);
}
SYSINIT(ctrl_if_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, ctrl_if_init, NULL);
{
unsigned long l1, l2;
unsigned int l1i, l2i, port;
- int irq, owned;
+ int irq;
unsigned long flags;
shared_info_t *s = HYPERVISOR_shared_info;
+ vcpu_info_t *vcpu_info = &s->vcpu_data[smp_processor_id()];
local_irq_save(flags);
{
s->vcpu_data[0].evtchn_upcall_pending = 0;
/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
- l1 = xen_xchg(&s->evtchn_pending_sel, 0);
+ l1 = xen_xchg(&vcpu_info->evtchn_pending_sel, 0);
while ( (l1i = ffs(l1)) != 0 )
{
l1i--;
l2 &= ~(1 << l2i);
port = (l1i << 5) + l2i;
- if ((owned = mtx_owned(&sched_lock)) != 0)
- mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
if ( (irq = evtchn_to_irq[port]) != -1 ) {
struct intsrc *isrc = intr_lookup_source(irq);
intr_execute_handlers(isrc, frame);
-
} else {
evtchn_device_upcall(port);
}
- if (owned)
- mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
}
}
}
};
#endif
-
+#if 0
static void
misdirect_interrupt(void *sc)
{
}
-
+#endif
void irq_suspend(void)
{
int virq, irq, evtchn;
}
#endif
+#if 0
(void) intr_add_handler("xb_mis", bind_virq_to_irq(VIRQ_MISDIRECT),
(driver_intr_t *)misdirect_interrupt,
NULL, INTR_TYPE_MISC, NULL);
+
+#endif
}
SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_ANY, evtchn_init, NULL);
#include "assym.s"
.section __xen_guest
- .asciz "LOADER=generic,GUEST_VER=5.2.1,XEN_VER=2.0,BSD_SYMTAB"
+ .asciz "LOADER=generic,GUEST_VER=5.3,XEN_VER=3.0,BSD_SYMTAB"
/*
#ifndef SMP
static struct pcpu __pcpu;
#endif
-
-static void
-map_range(void *physptr, unsigned long physptrindex,
- unsigned long physindex, int count, unsigned int flags) {
- int i;
- unsigned long pte, ppa;
- for (i = 0; i < count; i++) {
- pte = ((unsigned long)physptr) + (physptrindex << 2) + (i << 2);
- ppa = (PTOM(physindex + i) << PAGE_SHIFT) | flags | PG_V | PG_A;
- xpq_queue_pt_update((pt_entry_t *)pte, ppa);
- }
- mcl_flush_queue();
-}
+struct mtx icu_lock;
struct mem_range_softc mem_range_softc;
pmap_bootstrap((init_first)<< PAGE_SHIFT, 0);
for (i = 0; i < 10; i++)
phys_avail[i] = 0;
-#ifdef MAXMEM
- if (MAXMEM/4 < Maxmem)
- Maxmem = MAXMEM/4;
-#endif
physmem = Maxmem;
avail_end = ptoa(Maxmem) - round_page(MSGBUF_SIZE);
phys_avail[0] = init_first << PAGE_SHIFT;
phys_avail[1] = avail_end;
}
-extern pt_entry_t *KPTphys;
-extern int kernbase;
+extern unsigned long cpu0prvpage;
+extern unsigned long *SMPpt;
pteinfo_t *pteinfo_list;
unsigned long *xen_machine_phys = ((unsigned long *)VADDR(1008, 0));
+int preemptable;
+int gdt_set;
/* Linux infection */
#define PAGE_OFFSET KERNBASE
xendebug_flags = 0xffffffff;
/* pre-zero unused mapped pages */
bzero((char *)(KERNBASE + (tmpindex << PAGE_SHIFT)), (1024 - tmpindex)*PAGE_SIZE);
-
- KPTphys = (pt_entry_t *)xpmap_ptom(__pa(startinfo->pt_base + PAGE_SIZE));
IdlePTD = (pd_entry_t *)xpmap_ptom(__pa(startinfo->pt_base));
XENPRINTF("IdlePTD %p\n", IdlePTD);
XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
xen_start_info->flags, xen_start_info->pt_base,
xen_start_info->mod_start, xen_start_info->mod_len);
+ /* setup self-referential mapping first so vtomach will work */
+ xpq_queue_pt_update(IdlePTD + PTDPTDI , (unsigned long)IdlePTD |
+ PG_V | PG_A);
+ mcl_flush_queue();
/* Map proc0's UPAGES */
proc0uarea = (struct user *)(KERNBASE + (tmpindex << PAGE_SHIFT));
tmpindex += UAREA_PAGES;
/* allocate page for ldt */
ldt = (union descriptor *)(KERNBASE + (tmpindex << PAGE_SHIFT));
tmpindex++;
+#ifdef SMP
+ /* allocate cpu0 private page */
+ cpu0prvpage = (KERNBASE + (tmpindex << PAGE_SHIFT));
+ tmpindex++;
+
+ /* allocate SMP page table */
+ SMPpt = (unsigned long *)(KERNBASE + (tmpindex << PAGE_SHIFT));
+
+ /* Map the private page into the SMP page table */
+ SMPpt[0] = vtomach(cpu0prvpage) | PG_RW | PG_M | PG_V | PG_A;
+
+ /* map SMP page table RO */
+ PT_SET_MA(SMPpt, vtomach(SMPpt) & ~PG_RW, TRUE);
+
+ /* put the page table into the pde */
+ xpq_queue_pt_update(IdlePTD + MPPTDI, xpmap_ptom((tmpindex << PAGE_SHIFT))| PG_M | PG_RW | PG_V | PG_A);
+
+ tmpindex++;
+#endif
#ifdef PMAP_DEBUG
pteinfo_list = (pteinfo_t *)(KERNBASE + (tmpindex << PAGE_SHIFT));
PT_CLEAR(KERNBASE + (i << PAGE_SHIFT), TRUE);
/* allocate remainder of NKPT pages */
- map_range(IdlePTD, KPTDI + 1, tmpindex, NKPT-1, PG_U | PG_M | PG_RW);
+ for (i = 0; i < NKPT-1; i++, tmpindex++)
+ xpq_queue_pt_update(IdlePTD + KPTDI + i + 1, xpmap_ptom((tmpindex << PAGE_SHIFT))| PG_M | PG_RW | PG_V | PG_A);
tmpindex += NKPT-1;
- map_range(IdlePTD, PTDPTDI, __pa(xen_start_info->pt_base) >> PAGE_SHIFT, 1, 0);
- xpq_queue_pt_update(KPTphys + tmpindex, xen_start_info->shared_info | PG_A | PG_V | PG_RW);
+
+
+ tmpindex += NKPT-1;
+ PT_UPDATES_FLUSH();
+
HYPERVISOR_shared_info = (shared_info_t *)(KERNBASE + (tmpindex << PAGE_SHIFT));
+ PT_SET_MA(HYPERVISOR_shared_info, xen_start_info->shared_info | PG_A | PG_V | PG_RW | PG_M, TRUE);
tmpindex++;
- mcl_flush_queue();
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = (unsigned long)xen_phys_machine;
- HYPERVISOR_shared_info->arch.mfn_to_pfn_start = (unsigned long)xen_machine_phys;
init_first = tmpindex;
{
int gsel_tss, metadata_missing, off, x, error;
struct pcpu *pc;
+ unsigned long gdtmachpfn;
trap_info_t trap_table[] = {
{ 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
{ 1, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
gdt_segs[GDATA_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 << 16)));
#endif
#ifdef SMP
+ /* this correspond to the cpu private page as mapped into the SMP page
+ * table in initvalues
+ */
pc = &SMP_prvspace[0].pcpu;
gdt_segs[GPRIV_SEL].ssd_limit =
atop(sizeof(struct privatespace) - 1);
gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
for (x = 0; x < NGDT; x++)
ssdtosd(&gdt_segs[x], &gdt[x].sd);
- /* re-map GDT read-only */
- {
- unsigned long gdtindex = (((unsigned long)gdt - KERNBASE) >> PAGE_SHIFT);
- unsigned long gdtphys = PTOM(gdtindex);
- map_range(KPTphys, gdtindex, gdtindex, 1, 0);
- mcl_flush_queue();
- if (HYPERVISOR_set_gdt(&gdtphys, LAST_RESERVED_GDT_ENTRY + 1)) {
- panic("set_gdt failed\n");
- }
- lgdt_finish();
+
+ PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~PG_RW, TRUE);
+ gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
+ if (HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1)) {
+ XENPRINTF("set_gdt failed\n");
+
}
+ lgdt_finish();
+ gdt_set = 1;
if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) {
panic("set_trap_table failed - error %d\n", error);
PCPU_SET(prvspace, pc);
PCPU_SET(curthread, &thread0);
PCPU_SET(curpcb, thread0.td_pcb);
- PCPU_SET(trap_nesting, 0);
PCPU_SET(pdir, (unsigned long)IdlePTD);
/*
* Initialize mutexes.
*/
mutex_init();
+ mtx_init(&clock_lock, "clk", NULL, MTX_SPIN);
+ mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
+
+
+
/* make ldt memory segments */
/*
* XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it
default_proc_ldt.ldt_base = (caddr_t)ldt;
default_proc_ldt.ldt_len = 6;
_default_ldt = (int)&default_proc_ldt;
- PCPU_SET(currentldt, _default_ldt);
- {
- unsigned long ldtindex = (((unsigned long)ldt - KERNBASE) >> PAGE_SHIFT);
- map_range(KPTphys, ldtindex, ldtindex, 1, 0);
- mcl_flush_queue();
- xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0]));
- }
-
+ PCPU_SET(currentldt, _default_ldt)
+ PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW, TRUE);
+ xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0]));
+
+
/*
* Initialize the console before we print anything out.
*/
KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16);
PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+#if 0
private_tss = 0;
PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
+#endif
HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), PCPU_GET(common_tss.tss_esp0));
+
dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
PT_UPDATES_FLUSH();
/* safe to enable xen page queue locking */
- xpq_init();
msgbufinit(msgbufp, MSGBUF_SIZE);
/* XXX KMM I don't think we need call gates */
mtx_unlock_spin(&smp_rv_mtx);
else
critical_exit();
+ PT_UPDATES_FLUSH();
}
void
mtx_unlock_spin(&smp_rv_mtx);
else
critical_exit();
+ PT_UPDATES_FLUSH();
}
void
mtx_unlock_spin(&smp_rv_mtx);
else
critical_exit();
+ PT_UPDATES_FLUSH();
}
#else /* !SMP */
/*
#endif
#include <machine/xenfunc.h>
-
+#if 0
#ifdef SMP
static void cpu_reset_proxy(void);
static u_int cpu_reset_proxyid;
static volatile u_int cpu_reset_proxy_active;
#endif
+#endif
static void sf_buf_init(void *arg);
SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
* Force reset the processor by invalidating the entire address space!
*/
+#if 0
#ifdef SMP
static void
cpu_reset_proxy()
stop_cpus((1<<cpu_reset_proxyid));
printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
DELAY(1000000);
- cpu_reset_real();
+ cpu_reset();
}
#endif
-
+#endif
void
cpu_reset()
{
(void)HYPERVISOR_console_write(buf, ret);
}
-#define XPQUEUE_SIZE 2048
-
-typedef struct xpq_queue {
- uint32_t ptr;
- uint32_t val;
-} xpq_queue_t;
-
-#define MCLQUEUE_SIZE 512
+#define XPQUEUE_SIZE 128
+
+#define MCLQUEUE_SIZE 32
+#ifdef SMP
+/* per-cpu queues and indices */
+static multicall_entry_t mcl_queue[MAX_VIRT_CPUS][MCLQUEUE_SIZE];
+static mmu_update_t xpq_queue[MAX_VIRT_CPUS][XPQUEUE_SIZE];
+static int mcl_idx[MAX_VIRT_CPUS];
+static int xpq_idx[MAX_VIRT_CPUS];
+
+#define MCL_QUEUE mcl_queue[vcpu]
+#define XPQ_QUEUE xpq_queue[vcpu]
+#define MCL_IDX mcl_idx[vcpu]
+#define XPQ_IDX xpq_idx[vcpu]
+#define SET_VCPU() int vcpu = smp_processor_id()
+#else
static multicall_entry_t mcl_queue[MCLQUEUE_SIZE];
+static mmu_update_t xpq_queue[XPQUEUE_SIZE];
static int mcl_idx = 0;
-
-static xpq_queue_t xpq_queue[XPQUEUE_SIZE];
-static boolean_t xpq_initialized;
-static struct mtx update_lock;
static int xpq_idx = 0;
-/*
- * Don't attempt to lock until after lock & memory initialization
- */
-#define XPQ_LOCK(lock, flags) \
- if (likely(xpq_initialized)) \
- mtx_lock_irqsave(lock, flags)
-#define XPQ_UNLOCK(lock, flags) \
- if (likely(xpq_initialized)) \
- mtx_unlock_irqrestore(lock, flags)
+#define MCL_QUEUE mcl_queue
+#define XPQ_QUEUE xpq_queue
+#define MCL_IDX mcl_idx
+#define XPQ_IDX xpq_idx
+#define SET_VCPU()
+#endif
+#define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
+#define MCL_IDX_INC atomic_add_int(&MCL_IDX, 1);
-void
-xpq_init(void)
-{
- xpq_initialized = TRUE;
- mtx_init(&update_lock, "mmu", "MMU LOCK", MTX_SPIN);
-}
static __inline void
_xpq_flush_queue(void)
{
- int _xpq_idx = xpq_idx;
- int error, i;
+ SET_VCPU();
+ int _xpq_idx = XPQ_IDX;
+ int error, i;
+ /* window of vulnerability here? */
- xpq_idx = 0;
- /* Make sure index is cleared first to avoid double updates. */
- error = HYPERVISOR_mmu_update((mmu_update_t *)xpq_queue, _xpq_idx,
- NULL);
-
- if (__predict_false(error < 0)) {
- for (i = 0; i < _xpq_idx; i++)
- printk("val: %x ptr: %p\n", xpq_queue[i].val, xpq_queue[i].ptr);
- panic("Failed to execute MMU updates: %d", error);
- }
+ XPQ_IDX = 0;
+ /* Make sure index is cleared first to avoid double updates. */
+ error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
+ _xpq_idx, NULL);
+
+ if (__predict_false(error < 0)) {
+ for (i = 0; i < _xpq_idx; i++)
+ printk("val: %x ptr: %p\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
+ panic("Failed to execute MMU updates: %d", error);
+ }
}
static void
xpq_flush_queue(void)
{
- unsigned long flags = 0;
+ SET_VCPU();
- XPQ_LOCK(&update_lock, flags);
- if (xpq_idx != 0) _xpq_flush_queue();
- XPQ_UNLOCK(&update_lock, flags);
+ if (XPQ_IDX != 0) _xpq_flush_queue();
}
static __inline void
_mcl_flush_queue(void)
{
- int _mcl_idx = mcl_idx;
- mcl_idx = 0;
- (void)HYPERVISOR_multicall(mcl_queue, _mcl_idx);
+ SET_VCPU();
+ int _mcl_idx = MCL_IDX;
+
+ MCL_IDX = 0;
+ (void)HYPERVISOR_multicall(&MCL_QUEUE, _mcl_idx);
}
void
mcl_flush_queue(void)
{
- unsigned long flags = 0;
-
- XPQ_LOCK(&update_lock, flags);
- if (__predict_true(mcl_idx != 0)) _mcl_flush_queue();
- XPQ_UNLOCK(&update_lock, flags);
- /* XXX: until we can remove the pervasive
- * __HYPERVISOR_update_va_mapping calls, we have 2 queues. In order
- * to ensure that they never get out of sync, only 1 flush interface
- * is provided.
- */
- xpq_flush_queue();
+
+ if (__predict_true(mcl_idx != 0)) _mcl_flush_queue();
+ /* XXX: until we can remove the pervasive
+ * __HYPERVISOR_update_va_mapping calls, we have 2 queues. In order
+ * to ensure that they never get out of sync, only 1 flush interface
+ * is provided.
+ */
+ xpq_flush_queue();
}
static __inline void
xpq_increment_idx(void)
{
- xpq_idx++;
- if (__predict_false(xpq_idx == XPQUEUE_SIZE))
+ SET_VCPU();
+
+ XPQ_IDX++;
+ if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
xpq_flush_queue();
}
static __inline void
mcl_increment_idx(void)
{
- mcl_idx++;
- if (__predict_false(mcl_idx == MCLQUEUE_SIZE))
+ SET_VCPU();
+ MCL_IDX++;
+
+ if (__predict_false(MCL_IDX == MCLQUEUE_SIZE))
mcl_flush_queue();
}
void
xpq_queue_invlpg(vm_offset_t va)
{
- unsigned long flags = 0;
-
- XPQ_LOCK(&update_lock, flags);
- xpq_queue[xpq_idx].ptr = (va & ~PAGE_MASK) | MMU_EXTENDED_COMMAND;
- xpq_queue[xpq_idx].val = MMUEXT_INVLPG;
- xpq_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ SET_VCPU();
+
+ XPQ_QUEUE[XPQ_IDX].ptr = (va & ~PAGE_MASK) | MMU_EXTENDED_COMMAND;
+ XPQ_QUEUE[XPQ_IDX].val = MMUEXT_INVLPG;
+ xpq_increment_idx();
}
void
load_cr3(uint32_t val)
{
- xpq_queue_pt_switch(val);
- xpq_flush_queue();
+ xpq_queue_pt_switch(val);
+ xpq_flush_queue();
}
void
xen_set_ldt(vm_offset_t base, uint32_t entries)
{
- xpq_queue_set_ldt(base, entries);
- _xpq_flush_queue();
+ xpq_queue_set_ldt(base, entries);
+ _xpq_flush_queue();
}
void
xen_machphys_update(unsigned long mfn, unsigned long pfn)
{
- unsigned long flags = 0;
- XPQ_LOCK(&update_lock, flags);
- xpq_queue[xpq_idx].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
- xpq_queue[xpq_idx].val = pfn;
- xpq_increment_idx();
- _xpq_flush_queue();
- XPQ_UNLOCK(&update_lock, flags);
+ SET_VCPU();
+
+ XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+ XPQ_QUEUE[XPQ_IDX].val = pfn;
+ xpq_increment_idx();
+ _xpq_flush_queue();
}
void
xpq_queue_pt_update(pt_entry_t *ptr, pt_entry_t val)
{
- unsigned long flags = 0;
-
- XPQ_LOCK(&update_lock, flags);
- xpq_queue[xpq_idx].ptr = (uint32_t)ptr;
- xpq_queue[xpq_idx].val = val;
- xpq_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ SET_VCPU();
+
+ XPQ_QUEUE[XPQ_IDX].ptr = (memory_t)ptr;
+ XPQ_QUEUE[XPQ_IDX].val = (memory_t)val;
+ xpq_increment_idx();
}
void
#if 0
printf("setting va %x to ma %x\n", va, ma);
#endif
- unsigned long flags = 0;
- XPQ_LOCK(&update_lock, flags);
- mcl_queue[mcl_idx].op = __HYPERVISOR_update_va_mapping;
- mcl_queue[mcl_idx].args[0] = (unsigned long)(va >> PAGE_SHIFT);
- mcl_queue[mcl_idx].args[1] = (unsigned long)ma;
- mcl_queue[mcl_idx].args[2] = 0;
- mcl_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ SET_VCPU();
+
+ MCL_QUEUE[MCL_IDX].op = __HYPERVISOR_update_va_mapping;
+ MCL_QUEUE[MCL_IDX].args[0] = (unsigned long)va;
+ MCL_QUEUE[MCL_IDX].args[1] = (unsigned long)ma;
+ MCL_QUEUE[MCL_IDX].args[2] = UVMF_INVLPG;
+ mcl_increment_idx();
}
void
xpq_queue_pt_switch(uint32_t val)
{
- unsigned long flags = 0;
- vm_paddr_t ma = xpmap_ptom(val) & PG_FRAME;
-
- XPQ_LOCK(&update_lock, flags);
- xpq_queue[xpq_idx].ptr = ma | MMU_EXTENDED_COMMAND;
- xpq_queue[xpq_idx].val = MMUEXT_NEW_BASEPTR;
- xpq_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ vm_paddr_t ma = xpmap_ptom(val) & PG_FRAME;
+ SET_VCPU();
+
+ XPQ_QUEUE[XPQ_IDX].ptr = ma | MMU_EXTENDED_COMMAND;
+ XPQ_QUEUE[XPQ_IDX].val = MMUEXT_NEW_BASEPTR;
+ xpq_increment_idx();
}
void
xpq_queue_pin_table(uint32_t pa, int type)
{
- unsigned long flags = 0;
- XPQ_LOCK(&update_lock, flags);
- xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
- switch (type) {
- case XPQ_PIN_L1_TABLE:
- xpq_queue[xpq_idx].val = MMUEXT_PIN_L1_TABLE;
- break;
- case XPQ_PIN_L2_TABLE:
- xpq_queue[xpq_idx].val = MMUEXT_PIN_L2_TABLE;
- break;
- }
- xpq_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ SET_VCPU();
+
+
+ XPQ_QUEUE[XPQ_IDX].ptr = pa | MMU_EXTENDED_COMMAND;
+ switch (type) {
+ case XPQ_PIN_L1_TABLE:
+ XPQ_QUEUE[XPQ_IDX].val = MMUEXT_PIN_L1_TABLE;
+ break;
+ case XPQ_PIN_L2_TABLE:
+ XPQ_QUEUE[XPQ_IDX].val = MMUEXT_PIN_L2_TABLE;
+ break;
+ }
+ xpq_increment_idx();
}
void
xpq_queue_unpin_table(uint32_t pa)
{
- unsigned long flags = 0;
-
- XPQ_LOCK(&update_lock, flags);
- xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
- xpq_queue[xpq_idx].val = MMUEXT_UNPIN_TABLE;
- xpq_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ SET_VCPU();
+
+ XPQ_QUEUE[XPQ_IDX].ptr = pa | MMU_EXTENDED_COMMAND;
+ XPQ_QUEUE[XPQ_IDX].val = MMUEXT_UNPIN_TABLE;
+ xpq_increment_idx();
}
void
xpq_queue_set_ldt(vm_offset_t va, uint32_t entries)
{
- unsigned long flags = 0;
-
- XPQ_LOCK(&update_lock, flags);
- KASSERT(va == (va & PG_FRAME), ("ldt not page aligned"));
- xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND | va;
- xpq_queue[xpq_idx].val = MMUEXT_SET_LDT |
- (entries << MMUEXT_CMD_SHIFT);
- xpq_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ SET_VCPU();
+
+ KASSERT(va == (va & PG_FRAME), ("ldt not page aligned"));
+ XPQ_QUEUE[XPQ_IDX].ptr = MMU_EXTENDED_COMMAND | va;
+ XPQ_QUEUE[XPQ_IDX].val = MMUEXT_SET_LDT |
+ (entries << MMUEXT_CMD_SHIFT);
+ xpq_increment_idx();
}
void
xpq_queue_tlb_flush()
{
- unsigned long flags = 0;
-
- XPQ_LOCK(&update_lock, flags);
-
- xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
- xpq_queue[xpq_idx].val = MMUEXT_TLB_FLUSH;
- xpq_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ SET_VCPU();
+
+ XPQ_QUEUE[XPQ_IDX].ptr = MMU_EXTENDED_COMMAND;
+ XPQ_QUEUE[XPQ_IDX].val = MMUEXT_TLB_FLUSH;
+ xpq_increment_idx();
}
#ifndef __ASM_EVTCHN_H__
#define __ASM_EVTCHN_H__
-
+#include <machine/pcpu.h>
#include <machine/hypervisor.h>
#include <machine/synch_bitops.h>
#include <machine/hypervisor-ifs.h>
+#ifdef SMP
+#include <sys/param.h> /* XXX for time.h */
+#include <sys/time.h> /* XXX for pcpu.h */
+#include <sys/pcpu.h> /* XXX for PCPU_GET */
+extern int gdt_set;
+static inline int
+smp_processor_id(void)
+{
+ if (likely(gdt_set))
+ return PCPU_GET(cpuid);
+ return 0;
+}
+
+#else
+#define smp_processor_id() 0
+#endif
+
/*
* LOW-LEVEL DEFINITIONS
*/
unmask_evtchn(int port)
{
shared_info_t *s = HYPERVISOR_shared_info;
+ vcpu_info_t *vcpu_info = &s->vcpu_data[smp_processor_id()];
synch_clear_bit(port, &s->evtchn_mask[0]);
* a real IO-APIC we 'lose the interrupt edge' if the channel is masked.
*/
if ( synch_test_bit (port, &s->evtchn_pending[0]) &&
- !synch_test_and_set_bit(port>>5, &s->evtchn_pending_sel) )
+ !synch_test_and_set_bit(port>>5, &vcpu_info->evtchn_pending_sel) )
{
s->vcpu_data[0].evtchn_upcall_pending = 1;
if ( !s->vcpu_data[0].evtchn_upcall_mask )
*/
#ifdef SMP
-#define MPPTDI (NPDEPTD-1) /* per cpu ptd entry */
-#define KPTDI (MPPTDI-NKPDE-XEN_PAGES /* start of kernel virtual pde's */
+#define MPPTDI (NPDEPTD-1-XEN_PAGES) /* per cpu ptd entry */
+#define KPTDI (MPPTDI-NKPDE) /* start of kernel virtual pde's */
#else
#define KPTDI (NPDEPTD-NKPDE-XEN_PAGES) /* start of kernel virtual pde's */
#endif /* SMP */
#define UPT_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI)
#define UPT_MIN_ADDRESS VADDR(PTDPTDI, 0)
-#define VM_MAXUSER_ADDRESS VADDR(PTDPTDI-1, 0)
+#define VM_MAXUSER_ADDRESS VADDR(PTDPTDI, 0)
#define USRSTACK VM_MAXUSER_ADDRESS
#ifndef _OS_H_
#define _OS_H_
+#include <machine/param.h>
#ifndef NULL
#define NULL (void *)0
/* some function prototypes */
void trap_init(void);
+extern int preemptable;
+#define preempt_disable() (preemptable = 0)
+#define preempt_enable() (preemptable = 1)
+#define preempt_enable_no_resched() (preemptable = 1)
+
/*
* STI/CLI equivalents. These basically set and clear the virtual
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
-#define __cli() \
-do { \
- HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \
- barrier(); \
+
+
+#define __cli() \
+do { \
+ vcpu_info_t *_vcpu; \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ _vcpu->evtchn_upcall_mask = 1; \
+ preempt_enable_no_resched(); \
+ barrier(); \
} while (0)
-#define __sti() \
-do { \
- shared_info_t *_shared = HYPERVISOR_shared_info; \
- barrier(); \
- _shared->vcpu_data[0].evtchn_upcall_mask = 0; \
- barrier(); /* unmask then check (avoid races) */ \
- if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \
- force_evtchn_callback(); \
+#define __sti() \
+do { \
+ vcpu_info_t *_vcpu; \
+ barrier(); \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ _vcpu->evtchn_upcall_mask = 0; \
+ barrier(); /* unmask then check (avoid races) */ \
+ if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
+ force_evtchn_callback(); \
+ preempt_enable(); \
} while (0)
+
#define __save_flags(x) \
do { \
- (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \
+ vcpu_info_t *vcpu; \
+ vcpu = HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ (x) = _vcpu->evtchn_upcall_mask; \
} while (0)
-#define __restore_flags(x) \
-do { \
- shared_info_t *_shared = HYPERVISOR_shared_info; \
- barrier(); \
- if ( (_shared->vcpu_data[0].evtchn_upcall_mask = (x)) == 0 ) { \
- barrier(); /* unmask then check (avoid races) */ \
- if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \
- force_evtchn_callback(); \
- } \
+#define __restore_flags(x) \
+do { \
+ vcpu_info_t *_vcpu; \
+ barrier(); \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
+ barrier(); /* unmask then check (avoid races) */ \
+ if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
+ force_evtchn_callback(); \
+ preempt_enable(); \
+ } else \
+ preempt_enable_no_resched(); \
} while (0)
-#define __save_and_cli(x) \
-do { \
- (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \
- HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \
- barrier(); \
-} while (0)
-#define __save_and_sti(x) \
-do { \
- shared_info_t *_shared = HYPERVISOR_shared_info; \
- barrier(); \
- (x) = _shared->vcpu_data[0].evtchn_upcall_mask; \
- _shared->vcpu_data[0].evtchn_upcall_mask = 0; \
- barrier(); /* unmask then check (avoid races) */ \
- if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \
- force_evtchn_callback(); \
+#define __save_and_cli(x) \
+do { \
+ vcpu_info_t *_vcpu; \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ (x) = _vcpu->evtchn_upcall_mask; \
+ _vcpu->evtchn_upcall_mask = 1; \
+ preempt_enable_no_resched(); \
+ barrier(); \
} while (0)
-#ifdef SMP
-/* extra macros need for the SMP case */
-#error "global_irq_* not defined"
-#endif
#define cli() __cli()
#define sti() __sti()
#define save_flags(x) __save_flags(x)
#define restore_flags(x) __restore_flags(x)
#define save_and_cli(x) __save_and_cli(x)
-#define save_and_sti(x) __save_and_sti(x)
#define local_irq_save(x) __save_and_cli(x)
-#define local_irq_set(x) __save_and_sti(x)
#define local_irq_restore(x) __restore_flags(x)
#define local_irq_disable() __cli()
#define local_irq_enable() __sti()
#define mb()
#define rmb()
-#define smp_mb()
#define wmb()
-
+#ifdef SMP
+#define smp_mb() mb()
+#define smp_rmb() rmb()
+#define smp_wmb() wmb()
+#define smp_read_barrier_depends() read_barrier_depends()
+#define set_mb(var, value) do { xchg(&var, value); } while (0)
+#else
+#define smp_mb() barrier()
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#define smp_read_barrier_depends() do { } while(0)
+#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif
/* This is a barrier for the compiler only, NOT the processor! */
static int blkif_control_rsp_valid;
static blkif_response_t blkif_control_rsp;
-static unsigned long xb_rec_ring_free;
-blkif_request_t xb_rec_ring[BLKIF_RING_SIZE]; /* shadow recovery ring */
+static blkif_front_ring_t blk_ring;
+
+static unsigned long rec_ring_free;
+blkif_request_t rec_ring[RING_SIZE(&blk_ring)]; /* shadow recovery ring */
/* XXX move to xb_vbd.c when VBD update support is added */
#define MAX_VBDS 64
static struct mtx blkif_io_lock;
-static blkif_ring_t *xb_blk_ring;
-static BLKIF_RING_IDX xb_resp_cons; /* Response consumer for comms ring. */
-static BLKIF_RING_IDX xb_req_prod; /* Private request producer */
static int xb_recovery = 0; /* "Recovery in progress" flag. Protected
* by the blkif_io_lock */
-/* We plug the I/O ring if the driver is suspended or if the ring is full. */
-#define BLKIF_RING_FULL (((xb_req_prod - xb_resp_cons) == BLKIF_RING_SIZE) || \
- (blkif_state != BLKIF_STATE_CONNECTED))
void blkif_completion(blkif_request_t *req);
void xb_response_intr(void *);
static inline int
GET_ID_FROM_FREELIST( void )
{
- unsigned long free = xb_rec_ring_free;
+ unsigned long free = rec_ring_free;
- KASSERT(free <= BLKIF_RING_SIZE, ("free %lu > BLKIF_RING_SIZE", free));
+ KASSERT(free <= RING_SIZE(&blk_ring), ("free %lu > RING_SIZE", free));
- xb_rec_ring_free = xb_rec_ring[free].id;
+ rec_ring_free = rec_ring[free].id;
- xb_rec_ring[free].id = 0x0fffffee; /* debug */
+ rec_ring[free].id = 0x0fffffee; /* debug */
return free;
}
static inline void
ADD_ID_TO_FREELIST( unsigned long id )
{
- xb_rec_ring[id].id = xb_rec_ring_free;
- xb_rec_ring_free = id;
+ rec_ring[id].id = rec_ring_free;
+ rec_ring_free = id;
}
static inline void translate_req_to_pfn(blkif_request_t *xreq,
static inline void flush_requests(void)
{
- xb_blk_ring->req_prod = xb_req_prod;
+ RING_PUSH_REQUESTS(&blk_ring);
notify_via_evtchn(blkif_evtchn);
}
struct xb_softc *sc = NULL;
struct bio *bp;
blkif_response_t *bret;
- BLKIF_RING_IDX i, rp;
+ RING_IDX i, rp;
unsigned long flags;
- if (blkif_state == BLKIF_STATE_CLOSED)
- return;
-
mtx_lock_irqsave(&blkif_io_lock, flags);
if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) ||
return;
}
- rp = xb_blk_ring->resp_prod;
+ rp = blk_ring.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
/* sometimes we seem to lose i/o. stay in the interrupt handler while
* there is stuff to process: continually recheck the response producer.
*/
- for ( i = xb_resp_cons; i != (rp = xb_blk_ring->resp_prod); i++ ) {
+ for ( i = blk_ring.rsp_cons; i != (rp = blk_ring.sring->rsp_prod); i++ ) {
unsigned long id;
- bret = &xb_blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
+ bret = RING_GET_RESPONSE(&blk_ring, i);
id = bret->id;
- bp = (struct bio *)xb_rec_ring[id].id;
+ bp = (struct bio *)rec_ring[id].id;
- blkif_completion(&xb_rec_ring[id]);
+ blkif_completion(&rec_ring[id]);
ADD_ID_TO_FREELIST(id); /* overwrites req */
}
}
- xb_resp_cons = i;
+ blk_ring.rsp_cons = i;
if (sc && xb_kick_pending) {
xb_kick_pending = FALSE;
{
struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
- TRACE_ENTER;
-
if (sc == NULL)
return (ENXIO);
s = splbio();
for (bp = bioq_first(&sc->xb_bioq);
- bp && !BLKIF_RING_FULL;
- xb_req_prod++, queued++, bp = bioq_first(&sc->xb_bioq)) {
+ bp && !RING_FULL(&blk_ring);
+ blk_ring.req_prod_pvt++, queued++, bp = bioq_first(&sc->xb_bioq)) {
/* Check if the buffer is properly aligned */
if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
buffer_ma &= ~PAGE_MASK;
/* Fill out a communications ring structure. */
- req = &xb_blk_ring->ring[MASK_BLKIF_IDX(xb_req_prod)].req;
+ req = RING_GET_REQUEST(&blk_ring,
+ blk_ring.req_prod_pvt);
id = GET_ID_FROM_FREELIST();
- xb_rec_ring[id].id= (unsigned long)bp;
+ rec_ring[id].id= (unsigned long)bp;
req->id = id;
req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
req->frame_and_sects[0] = buffer_ma | (fsect << 3) | lsect;
/* Keep a private copy so we can reissue requests when recovering. */
- translate_req_to_pfn( &xb_rec_ring[id], req);
+ translate_req_to_pfn( &rec_ring[id], req);
}
- if (BLKIF_RING_FULL)
+ if (RING_FULL(&blk_ring))
xb_kick_pending = TRUE;
if (queued != 0)
blkif_response_t rsp;
vdisk_t *buf;
- TRACE_ENTER;
-
buf = (vdisk_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
/* Probe for disk information. */
blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
{
unsigned long flags, id;
+ blkif_request_t *req_d;
retry:
- while ( (xb_req_prod - xb_resp_cons) == BLKIF_RING_SIZE ) {
+ while ( RING_FULL(&blk_ring) )
+ {
tsleep( req, PWAIT | PCATCH, "blkif", hz);
}
mtx_lock_irqsave(&blkif_io_lock, flags);
- if ( (xb_req_prod - xb_resp_cons) == BLKIF_RING_SIZE )
+ if ( RING_FULL(&blk_ring) )
{
mtx_unlock_irqrestore(&blkif_io_lock, flags);
goto retry;
}
- xb_blk_ring->ring[MASK_BLKIF_IDX(xb_req_prod)].req = *req;
+ req_d = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
+ *req_d = *req;
id = GET_ID_FROM_FREELIST();
- xb_blk_ring->ring[MASK_BLKIF_IDX(xb_req_prod)].req.id = id;
- xb_rec_ring[id].id = (unsigned long) req;
+ req_d->id = id;
+ rec_ring[id].id = (unsigned long) req;
- translate_req_to_pfn( &xb_rec_ring[id], req );
+ translate_req_to_pfn( &rec_ring[id], req );
- xb_req_prod++;
+ blk_ring.req_prod_pvt++;
flush_requests();
mtx_unlock_irqrestore(&blkif_io_lock, flags);
blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
msg->handle = 0;
- msg->shmem_frame = (vtomach(xb_blk_ring) >> PAGE_SHIFT);
+ msg->shmem_frame = (vtomach(blk_ring.sring) >> PAGE_SHIFT);
ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
}
mtx_unlock_irqrestore(&blkif_io_lock, flags);
/* Free resources associated with old device channel. */
- if (xb_blk_ring) {
- free(xb_blk_ring, M_DEVBUF);
- xb_blk_ring = NULL;
+ if (blk_ring.sring != NULL) {
+ free(blk_ring.sring, M_DEVBUF);
+ blk_ring.sring = NULL;
}
/* free_irq(blkif_irq, NULL);*/
blkif_irq = 0;
static void
blkif_disconnect(void)
{
- if (xb_blk_ring) free(xb_blk_ring, M_DEVBUF);
- xb_blk_ring = (blkif_ring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
- xb_blk_ring->req_prod = xb_blk_ring->resp_prod = 0;
- xb_resp_cons = xb_req_prod = 0;
+ if (blk_ring.sring) free(blk_ring.sring, M_DEVBUF);
+ blk_ring.sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
+ SHARED_RING_INIT(blk_ring.sring);
+ FRONT_RING_INIT(&blk_ring, blk_ring.sring);
blkif_state = BLKIF_STATE_DISCONNECTED;
blkif_send_interface_connect();
}
{
int i;
+ blkif_request_t *req;
/* Hmm, requests might be re-ordered when we re-issue them.
* This will need to be fixed once we have barriers */
/* Stage 1 : Find active and move to safety. */
- for ( i = 0; i < BLKIF_RING_SIZE; i++ ) {
- if ( xb_rec_ring[i].id >= KERNBASE ) {
- translate_req_to_mfn(
- &xb_blk_ring->ring[xb_req_prod].req, &xb_rec_ring[i]);
- xb_req_prod++;
+ for ( i = 0; i < RING_SIZE(&blk_ring); i++ ) {
+ if ( rec_ring[i].id >= KERNBASE ) {
+ req = RING_GET_REQUEST(&blk_ring,
+ blk_ring.req_prod_pvt);
+ translate_req_to_mfn(req, &rec_ring[i]);
+ blk_ring.req_prod_pvt++;
}
}
- printk("blkfront: recovered %d descriptors\n",xb_req_prod);
+ printk("blkfront: recovered %d descriptors\n",blk_ring.req_prod_pvt);
/* Stage 2 : Set up shadow list. */
- for ( i = 0; i < xb_req_prod; i++ ) {
- xb_rec_ring[i].id = xb_blk_ring->ring[i].req.id;
- xb_blk_ring->ring[i].req.id = i;
- translate_req_to_pfn(&xb_rec_ring[i], &xb_blk_ring->ring[i].req);
+ for ( i = 0; i < blk_ring.req_prod_pvt; i++ ) {
+ req = RING_GET_REQUEST(&blk_ring, i);
+ rec_ring[i].id = req->id;
+ req->id = i;
+ translate_req_to_pfn(&rec_ring[i], req);
}
/* Stage 3 : Set up free list. */
- for ( ; i < BLKIF_RING_SIZE; i++ ){
- xb_rec_ring[i].id = i+1;
+ for ( ; i < RING_SIZE(&blk_ring); i++ ){
+ rec_ring[i].id = i+1;
}
- xb_rec_ring_free = xb_req_prod;
- xb_rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
+ rec_ring_free = blk_ring.req_prod_pvt;
+ rec_ring[RING_SIZE(&blk_ring)-1].id = 0x0fffffff;
- /* xb_blk_ring->req_prod will be set when we flush_requests().*/
+ /* blk_ring.req_prod will be set when we flush_requests().*/
wmb();
/* Switch off recovery mode, using a memory barrier to ensure that
printk("[XEN] Initialising virtual block device driver\n");
- xb_rec_ring_free = 0;
- for (i = 0; i < BLKIF_RING_SIZE; i++) {
- xb_rec_ring[i].id = i+1;
+ rec_ring_free = 0;
+ for (i = 0; i < RING_SIZE(&blk_ring); i++) {
+ rec_ring[i].id = i+1;
}
- xb_rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
+ rec_ring[RING_SIZE(&blk_ring)-1].id = 0x0fffffff;
(void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, 0);
}
}
-MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_SPIN);
+MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_SPIN | MTX_NOWITNESS); /* XXX how does one enroll a lock? */
SYSINIT(xbdev, SI_SUB_PSEUDO, SI_ORDER_ANY, xb_init, NULL)
static unsigned long evtchn_dev_inuse;
/* Notification ring, accessed via /dev/xen/evtchn. */
-#define RING_SIZE 2048 /* 2048 16-bit entries */
-#define RING_MASK(_i) ((_i)&(RING_SIZE-1))
+
+#define EVTCHN_RING_SIZE 2048 /* 2048 16-bit entries */
+
+#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
static uint16_t *ring;
static unsigned int ring_cons, ring_prod, ring_overflow;
clear_evtchn(port);
if ( ring != NULL ) {
- if ( (ring_prod - ring_cons) < RING_SIZE ) {
- ring[RING_MASK(ring_prod)] = (uint16_t)port;
+ if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) {
+ ring[EVTCHN_RING_MASK(ring_prod)] = (uint16_t)port;
if ( ring_cons == ring_prod++ ) {
wakeup(evtchn_waddr);
}
}
/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
- if ( ((c ^ p) & RING_SIZE) != 0 ) {
- bytes1 = (RING_SIZE - RING_MASK(c)) * sizeof(uint16_t);
- bytes2 = RING_MASK(p) * sizeof(uint16_t);
+ if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 ) {
+ bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(uint16_t);
+ bytes2 = EVTCHN_RING_MASK(p) * sizeof(uint16_t);
}
else {
bytes1 = (p - c) * sizeof(uint16_t);
bytes2 = count - bytes1;
}
- if ( uiomove(&ring[RING_MASK(c)], bytes1, uio) ||
+ if ( uiomove(&ring[EVTCHN_RING_MASK(c)], bytes1, uio) ||
((bytes2 != 0) && uiomove(&ring[0], bytes2, uio)))
/* keeping this around as its replacement is not equivalent
* copyout(&ring[0], &buf[bytes1], bytes2)
netctrl_connected(void)
{
int ok;
-
+ XENPRINTF("err %d up %d\n", netctrl.err, netctrl.up);
if (netctrl.err)
ok = netctrl.err;
else if (netctrl.up == NETIF_DRIVER_STATUS_UP)
= INVALID_P2M_ENTRY;
xn_rx_mcl[i].op = __HYPERVISOR_update_va_mapping;
- xn_rx_mcl[i].args[0] = (unsigned long)mtod(m_new,vm_offset_t)
- >> PAGE_SHIFT;
+ xn_rx_mcl[i].args[0] = (unsigned long)mtod(m_new,vm_offset_t);
xn_rx_mcl[i].args[1] = 0;
xn_rx_mcl[i].args[2] = 0;
mmu->val = (unsigned long)m->m_ext.ext_args >> PAGE_SHIFT;
mmu++;
mcl->op = __HYPERVISOR_update_va_mapping;
- mcl->args[0] = (unsigned long)m->m_data >> PAGE_SHIFT;
+ mcl->args[0] = (unsigned long)m->m_data;
mcl->args[1] = (rx->addr & ~PAGE_MASK) | PG_KERNEL;
mcl->args[2] = 0;
mcl++;
static void
netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
{
-
switch ( msg->subtype )
{
case CMSG_NETIF_FE_INTERFACE_STATUS:
break;
}
- ctrl_if_send_response(msg);
+ ctrl_if_send_response(msg);
}
#if 1
{
int err = 0, conn = 0;
int wait_i, wait_n = 100;
-
for ( wait_i = 0; wait_i < wait_n; wait_i++)
{
XENPRINTF("> wait_i=%d\n", wait_i);
{
int err = 0;
-
+
netctrl_init();
(void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx,
CALLBACK_IN_BLOCKING_CONTEXT);